{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "5f1c83e5",
   "metadata": {},
   "source": [
    "<a href=\"https://colab.research.google.com/github/run-llama/llama_index/blob/main/docs/examples/query_engine/JointQASummary.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "68490dba",
   "metadata": {},
   "source": [
    "# Joint QA Summary Query Engine"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fec9bc5a",
   "metadata": {},
   "source": [
    "If you're opening this Notebook on colab, you will probably need to install LlamaIndex 🦙."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fcb87406",
   "metadata": {},
   "outputs": [],
   "source": [
    "%pip install llama-index-llms-openai"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a2e339d9",
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install llama-index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a54d1c43-4b7f-4917-939f-a964f6f3dafc",
   "metadata": {},
   "outputs": [],
   "source": [
    "import nest_asyncio\n",
    "\n",
    "nest_asyncio.apply()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fa67fa07-1395-4aab-a356-72bdb302f6b2",
   "metadata": {},
   "outputs": [],
   "source": [
    "import logging\n",
    "import sys\n",
    "\n",
    "logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n",
    "logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "9efa3b50",
   "metadata": {},
   "source": [
    "## Download Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1f9f1203",
   "metadata": {},
   "outputs": [],
   "source": [
    "!mkdir -p 'data/paul_graham/'\n",
    "!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "0e307a15",
   "metadata": {},
   "source": [
    "## Load Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e7cdaf9d-cfbd-4ced-8d4e-6eef8508224d",
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_index.core import SimpleDirectoryReader\n",
    "\n",
    "reader = SimpleDirectoryReader(\"./data/paul_graham/\")\n",
    "documents = reader.load_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9bba68f3-2743-437e-93b6-ce9ba92e40c3",
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_index.llms.openai import OpenAI\n",
    "\n",
    "gpt4 = OpenAI(temperature=0, model=\"gpt-4\")\n",
    "\n",
    "chatgpt = OpenAI(temperature=0, model=\"gpt-3.5-turbo\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "16216dfb-35ea-49ac-b651-2e8a9e423512",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
      "> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
      "INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 20729 tokens\n",
      "> [build_index_from_nodes] Total embedding token usage: 20729 tokens\n",
      "INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
      "> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
      "INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 0 tokens\n",
      "> [build_index_from_nodes] Total embedding token usage: 0 tokens\n"
     ]
    }
   ],
   "source": [
    "from llama_index.core.composability import QASummaryQueryEngineBuilder\n",
    "\n",
    "# NOTE: can also specify an existing docstore, summary text, qa_text, etc.\n",
    "query_engine_builder = QASummaryQueryEngineBuilder(\n",
    "    llm=gpt4,\n",
    ")\n",
    "query_engine = query_engine_builder.build_from_documents(documents)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ae60000b-403c-4350-af32-71e26cc68a75",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:llama_index.query_engine.router_query_engine:Selecting query engine 1 because: This choice is relevant because it is specifically for summarization queries, which matches the request for a summary of the author's life..\n",
      "Selecting query engine 1 because: This choice is relevant because it is specifically for summarization queries, which matches the request for a summary of the author's life..\n",
      "INFO:llama_index.indices.common_tree.base:> Building index from nodes: 6 chunks\n",
      "> Building index from nodes: 6 chunks\n",
      "INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 1012 tokens\n",
      "> [get_response] Total LLM token usage: 1012 tokens\n",
      "INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens\n",
      "> [get_response] Total embedding token usage: 0 tokens\n",
      "INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 23485 tokens\n",
      "> [get_response] Total LLM token usage: 23485 tokens\n",
      "INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens\n",
      "> [get_response] Total embedding token usage: 0 tokens\n"
     ]
    }
   ],
   "source": [
    "response = query_engine.query(\n",
    "    \"Can you give me a summary of the author's life?\",\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4488669d-0f67-48c9-994c-bd7a42498ecb",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:llama_index.query_engine.router_query_engine:Selecting query engine 0 because: This choice is relevant because it involves retrieving specific context from documents, which is needed to answer the question about the author's activities growing up..\n",
      "Selecting query engine 0 because: This choice is relevant because it involves retrieving specific context from documents, which is needed to answer the question about the author's activities growing up..\n",
      "INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens\n",
      "> [retrieve] Total LLM token usage: 0 tokens\n",
      "INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 8 tokens\n",
      "> [retrieve] Total embedding token usage: 8 tokens\n",
      "INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 1893 tokens\n",
      "> [get_response] Total LLM token usage: 1893 tokens\n",
      "INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens\n",
      "> [get_response] Total embedding token usage: 0 tokens\n"
     ]
    }
   ],
   "source": [
    "response = query_engine.query(\n",
    "    \"What did the author do growing up?\",\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ff95db5f-7cbe-4ed7-83ff-27e00b94e7da",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:llama_index.query_engine.router_query_engine:Selecting query engine 0 because: This choice is relevant because it involves retrieving specific context from documents, which is needed to answer the question about the author's activities in art school..\n",
      "Selecting query engine 0 because: This choice is relevant because it involves retrieving specific context from documents, which is needed to answer the question about the author's activities in art school..\n",
      "INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens\n",
      "> [retrieve] Total LLM token usage: 0 tokens\n",
      "INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 12 tokens\n",
      "> [retrieve] Total embedding token usage: 12 tokens\n",
      "INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 1883 tokens\n",
      "> [get_response] Total LLM token usage: 1883 tokens\n",
      "INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens\n",
      "> [get_response] Total embedding token usage: 0 tokens\n"
     ]
    }
   ],
   "source": [
    "response = query_engine.query(\n",
    "    \"What did the author do during his time in art school?\",\n",
    ")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "llama",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
